// Copyright 2015 Matthew Holt
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package certmagic

import (
	"context"
	"crypto/x509"
	"encoding/json"
	"encoding/pem"
	"errors"
	"fmt"
	"io/fs"
	"path"
	"runtime"
	"strings"
	"time"

	"github.com/mholt/acmez/v3/acme"
	"go.uber.org/zap"
	"golang.org/x/crypto/ocsp"
)

// maintainAssets is a permanently-blocking function
// that loops indefinitely and, on a regular schedule, checks
// certificates for expiration and initiates a renewal of certs
// that are expiring soon. It also updates OCSP stapling. It
// should only be called once per cache. Panics are recovered,
// and if panicCount < 10, the function is called recursively,
// incrementing panicCount each time. Initial invocation should
// start panicCount at 0.
func (certCache *Cache) maintainAssets(panicCount int) {
	log := certCache.logger.Named("maintenance")
	log = log.With(zap.String("cache", fmt.Sprintf("%p", certCache)))

	defer func() {
		if err := recover(); err != nil {
			buf := make([]byte, stackTraceBufferSize)
			buf = buf[:runtime.Stack(buf, false)]
			log.Error("panic", zap.Any("error", err), zap.ByteString("stack", buf))
			if panicCount < 10 {
				certCache.maintainAssets(panicCount + 1)
			}
		}
	}()

	certCache.optionsMu.RLock()
	renewalTicker := time.NewTicker(certCache.options.RenewCheckInterval)
	ocspTicker := time.NewTicker(certCache.options.OCSPCheckInterval)
	certCache.optionsMu.RUnlock()

	log.Info("started background certificate maintenance")

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	for {
		select {
		case <-renewalTicker.C:
			err := certCache.RenewManagedCertificates(ctx)
			if err != nil {
				log.Error("renewing managed certificates", zap.Error(err))
			}
		case <-ocspTicker.C:
			certCache.updateOCSPStaples(ctx)
		case <-certCache.stopChan:
			renewalTicker.Stop()
			ocspTicker.Stop()
			log.Info("stopped background certificate maintenance")
			close(certCache.doneChan)
			return
		}
	}
}

// RenewManagedCertificates renews managed certificates,
// including ones loaded on-demand. Note that this is done
// automatically on a regular basis; normally you will not
// need to call this. This method assumes non-interactive
// mode (i.e. operating in the background).
func (certCache *Cache) RenewManagedCertificates(ctx context.Context) error {
	log := certCache.logger.Named("maintenance")

	// configs will hold a map of certificate hash to the config
	// to use when managing that certificate
	configs := make(map[string]*Config)

	// we use the queues for a very important reason: to do any and all
	// operations that could require an exclusive write lock outside
	// of the read lock! otherwise we get a deadlock, yikes. in other
	// words, our first iteration through the certificate cache does NOT
	// perform any operations--only queues them--so that more fine-grained
	// write locks may be obtained during the actual operations.
	var renewQueue, reloadQueue, deleteQueue, ariQueue certList

	certCache.mu.RLock()
	for certKey, cert := range certCache.cache {
		if !cert.managed {
			continue
		}

		// the list of names on this cert should never be empty... programmer error?
		if len(cert.Names) == 0 {
			log.Warn("certificate has no names; removing from cache", zap.String("cert_key", certKey))
			deleteQueue = append(deleteQueue, cert)
			continue
		}

		// get the config associated with this certificate
		cfg, err := certCache.getConfig(cert)
		if err != nil {
			log.Error("unable to get configuration to manage certificate; unable to renew",
				zap.Strings("identifiers", cert.Names),
				zap.Error(err))
			continue
		}
		if cfg.OnDemand != nil {
			continue
		}

		// ACME-specific: see if if ACME Renewal Info (ARI) window needs refreshing
		if !cfg.DisableARI && cert.ari.NeedsRefresh() {
			configs[cert.hash] = cfg
			ariQueue = append(ariQueue, cert)
		}

		// if time is up or expires soon, we need to try to renew it
		if cert.NeedsRenewal(cfg) {
			configs[cert.hash] = cfg

			// see if the certificate in storage has already been renewed, possibly by another
			// instance that didn't coordinate with this one; if so, just load it (this
			// might happen if another instance already renewed it - kinda sloppy but checking disk
			// first is a simple way to possibly drastically reduce rate limit problems)
			storedCertNeedsRenew, err := cfg.managedCertInStorageNeedsRenewal(ctx, cert)
			if err != nil {
				// hmm, weird, but not a big deal, maybe it was deleted or something
				log.Warn("error while checking if stored certificate is also expiring soon",
					zap.Strings("identifiers", cert.Names),
					zap.Error(err))
			} else if !storedCertNeedsRenew {
				// if the certificate does NOT need renewal and there was no error, then we
				// are good to just reload the certificate from storage instead of repeating
				// a likely-unnecessary renewal procedure
				reloadQueue = append(reloadQueue, cert)
				continue
			}

			// the certificate in storage has not been renewed yet, so we will do it
			// NOTE: It is super-important to note that the TLS-ALPN challenge requires
			// a write lock on the cache in order to complete its challenge, so it is extra
			// vital that this renew operation does not happen inside our read lock!
			renewQueue.insert(cert)
		}
	}
	certCache.mu.RUnlock()

	// Update ARI, and then for any certs where the ARI window changed,
	// be sure to queue them for renewal if necessary
	for _, cert := range ariQueue {
		cfg := configs[cert.hash]
		cert, changed, err := cfg.updateARI(ctx, cert, log)
		if err != nil {
			log.Error("updating ARI", zap.Error(err))
		}
		if changed && cert.NeedsRenewal(cfg) {
			// it's theoretically possible that another instance already got the memo
			// on the changed ARI and even renewed the cert already, and thus doing it
			// here is wasteful, but I have never heard of this happening in reality,
			// so to save some cycles for now I think we'll just queue it for renewal
			// (notice how we use 'insert' to avoid duplicates, in case it was already
			// scheduled for renewal anyway)
			renewQueue.insert(cert)
		}
	}

	// Reload certificates that merely need to be updated in memory
	for _, oldCert := range reloadQueue {
		timeLeft := expiresAt(oldCert.Leaf).Sub(time.Now().UTC())
		log.Info("certificate expires soon, but is already renewed in storage; reloading stored certificate",
			zap.Strings("identifiers", oldCert.Names),
			zap.Duration("remaining", timeLeft))

		cfg := configs[oldCert.hash]

		// crucially, this happens OUTSIDE a lock on the certCache
		_, err := cfg.reloadManagedCertificate(ctx, oldCert)
		if err != nil {
			log.Error("loading renewed certificate",
				zap.Strings("identifiers", oldCert.Names),
				zap.Error(err))
			continue
		}
	}

	// Renewal queue
	for _, oldCert := range renewQueue {
		cfg := configs[oldCert.hash]
		err := certCache.queueRenewalTask(ctx, oldCert, cfg)
		if err != nil {
			log.Error("queueing renewal task",
				zap.Strings("identifiers", oldCert.Names),
				zap.Error(err))
			continue
		}
	}

	// Deletion queue
	certCache.mu.Lock()
	for _, cert := range deleteQueue {
		certCache.removeCertificate(cert)
	}
	certCache.mu.Unlock()

	return nil
}

func (certCache *Cache) queueRenewalTask(ctx context.Context, oldCert Certificate, cfg *Config) error {
	log := certCache.logger.Named("maintenance")

	timeLeft := expiresAt(oldCert.Leaf).Sub(time.Now().UTC())
	log.Info("certificate expires soon; queuing for renewal",
		zap.Strings("identifiers", oldCert.Names),
		zap.Duration("remaining", timeLeft))

	// Get the name which we should use to renew this certificate;
	// we only support managing certificates with one name per cert,
	// so this should be easy.
	renewName := oldCert.Names[0]

	// queue up this renewal job (is a no-op if already active or queued)
	jm.Submit(cfg.Logger, "renew_"+renewName, func() error {
		timeLeft := expiresAt(oldCert.Leaf).Sub(time.Now().UTC())
		log.Info("attempting certificate renewal",
			zap.Strings("identifiers", oldCert.Names),
			zap.Duration("remaining", timeLeft))

		// perform renewal - crucially, this happens OUTSIDE a lock on certCache
		err := cfg.RenewCertAsync(ctx, renewName, false)
		if err != nil {
			if cfg.OnDemand != nil {
				// loaded dynamically, remove dynamically
				certCache.mu.Lock()
				certCache.removeCertificate(oldCert)
				certCache.mu.Unlock()
			}
			return fmt.Errorf("%v %v", oldCert.Names, err)
		}

		// successful renewal, so update in-memory cache by loading
		// renewed certificate so it will be used with handshakes
		_, err = cfg.reloadManagedCertificate(ctx, oldCert)
		if err != nil {
			return ErrNoRetry{fmt.Errorf("%v %v", oldCert.Names, err)}
		}
		return nil
	})

	return nil
}

// updateOCSPStaples updates the OCSP stapling in all
// eligible, cached certificates.
//
// OCSP maintenance strives to abide the relevant points on
// Ryan Sleevi's recommendations for good OCSP support:
// https://gist.github.com/sleevi/5efe9ef98961ecfb4da8
func (certCache *Cache) updateOCSPStaples(ctx context.Context) {
	logger := certCache.logger.Named("maintenance")

	// temporary structures to store updates or tasks
	// so that we can keep our locks short-lived
	type ocspUpdate struct {
		rawBytes []byte
		parsed   *ocsp.Response
	}
	type updateQueueEntry struct {
		cert           Certificate
		certHash       string
		lastNextUpdate time.Time
		cfg            *Config
	}
	type renewQueueEntry struct {
		oldCert Certificate
		cfg     *Config
	}
	updated := make(map[string]ocspUpdate)
	var updateQueue []updateQueueEntry // certs that need a refreshed staple
	var renewQueue []renewQueueEntry   // certs that need to be renewed (due to revocation)

	// obtain brief read lock during our scan to see which staples need updating
	certCache.mu.RLock()
	for certHash, cert := range certCache.cache {
		// no point in updating OCSP for expired or "synthetic" certificates
		if cert.Leaf == nil || cert.Expired() {
			continue
		}
		cfg, err := certCache.getConfig(cert)
		if err != nil {
			logger.Error("unable to get automation config for certificate; maintenance for this certificate will likely fail",
				zap.Strings("identifiers", cert.Names),
				zap.Error(err))
			continue
		}
		// always try to replace revoked certificates, even if OCSP response is still fresh
		if certShouldBeForceRenewed(cert) {
			renewQueue = append(renewQueue, renewQueueEntry{
				oldCert: cert,
				cfg:     cfg,
			})
			continue
		}
		// if the status is not fresh, get a new one
		var lastNextUpdate time.Time
		if cert.ocsp != nil {
			lastNextUpdate = cert.ocsp.NextUpdate
			if cert.ocsp.Status != ocsp.Unknown && freshOCSP(cert.ocsp) {
				// no need to update our staple if still fresh and not Unknown
				continue
			}
		}
		updateQueue = append(updateQueue, updateQueueEntry{cert, certHash, lastNextUpdate, cfg})
	}
	certCache.mu.RUnlock()

	// perform updates outside of any lock on certCache
	for _, qe := range updateQueue {
		cert := qe.cert
		certHash := qe.certHash
		lastNextUpdate := qe.lastNextUpdate

		if qe.cfg == nil {
			// this is bad if this happens, probably a programmer error (oops)
			logger.Error("no configuration associated with certificate; unable to manage OCSP staples",
				zap.Strings("identifiers", cert.Names))
			continue
		}

		err := stapleOCSP(ctx, qe.cfg.OCSP, qe.cfg.Storage, &cert, nil)
		if err != nil {
			if cert.ocsp != nil {
				// if there was no staple before, that's fine; otherwise we should log the error
				logger.Error("stapling OCSP",
					zap.Strings("identifiers", cert.Names),
					zap.Error(err))
			}
			continue
		}

		// By this point, we've obtained the latest OCSP response.
		// If there was no staple before, or if the response is updated, make
		// sure we apply the update to all names on the certificate if
		// the status is still Good.
		if cert.ocsp != nil && cert.ocsp.Status == ocsp.Good && (lastNextUpdate.IsZero() || lastNextUpdate != cert.ocsp.NextUpdate) {
			logger.Info("advancing OCSP staple",
				zap.Strings("identifiers", cert.Names),
				zap.Time("from", lastNextUpdate),
				zap.Time("to", cert.ocsp.NextUpdate))
			updated[certHash] = ocspUpdate{rawBytes: cert.Certificate.OCSPStaple, parsed: cert.ocsp}
		}

		// If the updated staple shows that the certificate was revoked, we should immediately renew it
		if certShouldBeForceRenewed(cert) {
			qe.cfg.emit(ctx, "cert_ocsp_revoked", map[string]any{
				"subjects":    cert.Names,
				"certificate": cert,
				"reason":      cert.ocsp.RevocationReason,
				"revoked_at":  cert.ocsp.RevokedAt,
			})

			renewQueue = append(renewQueue, renewQueueEntry{
				oldCert: cert,
				cfg:     qe.cfg,
			})
		}
	}

	// These write locks should be brief since we have all the info we need now.
	for certKey, update := range updated {
		certCache.mu.Lock()
		if cert, ok := certCache.cache[certKey]; ok {
			cert.ocsp = update.parsed
			cert.Certificate.OCSPStaple = update.rawBytes
			certCache.cache[certKey] = cert
		}
		certCache.mu.Unlock()
	}

	// We attempt to replace any certificates that were revoked.
	// Crucially, this happens OUTSIDE a lock on the certCache.
	for _, renew := range renewQueue {
		_, err := renew.cfg.forceRenew(ctx, logger, renew.oldCert)
		if err != nil {
			logger.Info("forcefully renewing certificate due to REVOKED status",
				zap.Strings("identifiers", renew.oldCert.Names),
				zap.Error(err))
		}
	}
}

// storageHasNewerARI returns true if the configured storage has ARI that is newer
// than that of a certificate that is already loaded, along with the value from
// storage.
func (cfg *Config) storageHasNewerARI(ctx context.Context, cert Certificate) (bool, acme.RenewalInfo, error) {
	storedCert, err := cfg.loadStoredACMECertificateMetadata(ctx, cert)
	if err != nil || storedCert.RenewalInfo == nil || storedCert.RenewalInfo.RetryAfter == nil {
		return false, acme.RenewalInfo{}, err
	}
	// prefer stored info if it has a window and the loaded one doesn't,
	// or if the one in storage has a later RetryAfter (though I suppose
	// it's not guaranteed, typically those will move forward in time)
	if (!cert.ari.HasWindow() && storedCert.RenewalInfo.HasWindow()) ||
		(cert.ari.RetryAfter == nil || storedCert.RenewalInfo.RetryAfter.After(*cert.ari.RetryAfter)) {
		return true, *storedCert.RenewalInfo, nil
	}
	return false, acme.RenewalInfo{}, nil
}

// loadStoredACMECertificateMetadata loads the stored ACME certificate data
// from the cert's sidecar JSON file.
func (cfg *Config) loadStoredACMECertificateMetadata(ctx context.Context, cert Certificate) (acme.Certificate, error) {
	metaBytes, err := cfg.Storage.Load(ctx, StorageKeys.SiteMeta(cert.issuerKey, cert.Names[0]))
	if err != nil {
		return acme.Certificate{}, fmt.Errorf("loading cert metadata: %w", err)
	}

	var certRes CertificateResource
	if err = json.Unmarshal(metaBytes, &certRes); err != nil {
		return acme.Certificate{}, fmt.Errorf("unmarshaling cert metadata: %w", err)
	}

	var acmeCert acme.Certificate
	if err = json.Unmarshal(certRes.IssuerData, &acmeCert); err != nil {
		return acme.Certificate{}, fmt.Errorf("unmarshaling potential ACME issuer metadata: %v", err)
	}

	return acmeCert, nil
}

// updateARI updates the cert's ACME renewal info, first by checking storage for a newer
// one, or getting it from the CA if needed. The updated info is stored in storage and
// updated in the cache. The certificate with the updated ARI is returned. If true is
// returned, the ARI window or selected time has changed, and the caller should check if
// the cert needs to be renewed now, even if there is an error.
//
// This will always try to ARI without checking if it needs to be refreshed. Call
// NeedsRefresh() on the RenewalInfo first, and only call this if that returns true.
func (cfg *Config) updateARI(ctx context.Context, cert Certificate, logger *zap.Logger) (updatedCert Certificate, changed bool, err error) {
	logger = logger.With(
		zap.Strings("identifiers", cert.Names),
		zap.String("cert_hash", cert.hash),
		zap.String("ari_unique_id", cert.ari.UniqueIdentifier),
		zap.Time("cert_expiry", cert.Leaf.NotAfter))

	updatedCert = cert
	oldARI := cert.ari

	// synchronize ARI fetching; see #297
	lockName := "ari_" + cert.ari.UniqueIdentifier
	if _, ok := cfg.Storage.(TryLocker); ok {
		ok, err := tryAcquireLock(ctx, cfg.Storage, lockName)
		if err != nil {
			return cert, false, fmt.Errorf("unable to obtain ARI lock: %v", err)
		}
		if !ok {
			logger.Debug("attempted to obtain ARI lock but it was already taken")
			return cert, false, nil
		}
	} else if err := acquireLock(ctx, cfg.Storage, lockName); err != nil {
		return cert, false, fmt.Errorf("unable to obtain ARI lock: %v", err)
	}
	defer func() {
		if err := releaseLock(ctx, cfg.Storage, lockName); err != nil {
			logger.Error("unable to release ARI lock", zap.Error(err))
		}
	}()

	// see if the stored value has been refreshed already by another instance
	gotNewARI, newARI, err := cfg.storageHasNewerARI(ctx, cert)

	// when we're all done, log if something about the schedule is different
	// ("WARN" level because ARI window changing may be a sign of external trouble
	// and we want to draw their attention to a potential explanation URL)
	defer func() {
		changed = !newARI.SameWindow(oldARI)

		if changed {
			logger.Warn("ARI window or selected renewal time changed",
				zap.Time("prev_start", oldARI.SuggestedWindow.Start),
				zap.Time("next_start", newARI.SuggestedWindow.Start),
				zap.Time("prev_end", oldARI.SuggestedWindow.End),
				zap.Time("next_end", newARI.SuggestedWindow.End),
				zap.Time("prev_selected_time", oldARI.SelectedTime),
				zap.Time("next_selected_time", newARI.SelectedTime),
				zap.String("explanation_url", newARI.ExplanationURL))
		}
	}()

	if err == nil && gotNewARI {
		// great, storage has a newer one we can use
		cfg.certCache.mu.Lock()
		var ok bool
		updatedCert, ok = cfg.certCache.cache[cert.hash]
		if !ok {
			// cert is no longer in the cache... why? what's the right thing to do here?
			cfg.certCache.mu.Unlock()
			updatedCert = cert       // return input cert, not an empty one
			updatedCert.ari = newARI // might as well give it the new ARI for the benefit of our caller, but it won't be updated in the cache or in storage
			logger.Warn("loaded newer ARI from storage, but certificate is no longer in cache; newer ARI will be returned to caller, but not persisted in the cache",
				zap.Time("selected_time", newARI.SelectedTime),
				zap.Timep("next_update", newARI.RetryAfter),
				zap.String("explanation_url", newARI.ExplanationURL))
			return
		}
		updatedCert.ari = newARI
		cfg.certCache.cache[cert.hash] = updatedCert
		cfg.certCache.mu.Unlock()
		logger.Info("reloaded ARI with newer one in storage",
			zap.Timep("next_refresh", newARI.RetryAfter),
			zap.Time("renewal_time", newARI.SelectedTime))
		return
	}

	if err != nil {
		logger.Error("error while checking storage for updated ARI; updating ARI now", zap.Error(err))
	}

	// of the issuers configured, hopefully one of them is the ACME CA we got the cert from
	for _, iss := range cfg.Issuers {
		if ariGetter, ok := iss.(RenewalInfoGetter); ok && iss.IssuerKey() == cert.issuerKey {
			newARI, err = ariGetter.GetRenewalInfo(ctx, cert) // be sure to use existing newARI variable so we can compare against old value in the defer
			if err != nil {
				// could be anything, but a common error might simply be the "wrong" ACME CA
				// (meaning, different from the one that issued the cert, thus the only one
				// that would have any ARI for it) if multiple ACME CAs are configured
				logger.Error("failed updating renewal info from ACME CA",
					zap.String("issuer", iss.IssuerKey()),
					zap.Error(err))
				continue
			}

			// when we get the latest ARI, the acme package will select a time within the window
			// for us; of course, since it's random, it's likely different from the previously-
			// selected time; but if the window doesn't change, there's no need to change the
			// selected time (the acme package doesn't know the previous window to know better)
			// ... so if the window hasn't changed we'll just put back the selected time
			if newARI.SameWindow(oldARI) && !oldARI.SelectedTime.IsZero() {
				newARI.SelectedTime = oldARI.SelectedTime
			}

			// then store the updated ARI (even if the window didn't change, the Retry-After
			// likely did) in cache and storage

			// be sure we get the cert from the cache while inside a lock to avoid logical races
			cfg.certCache.mu.Lock()
			updatedCert, ok = cfg.certCache.cache[cert.hash]
			if !ok {
				// cert is no longer in the cache; this can happen for several reasons (past expiration,
				// rejected by on-demand permission module, random eviction due to full cache, etc), but
				// it probably means we don't have use of this ARI update now, so while we can return it
				// to the caller, we don't persist it anywhere beyond that...
				cfg.certCache.mu.Unlock()
				updatedCert = cert       // return input cert, not an empty one
				updatedCert.ari = newARI // might as well give it the new ARI for the benefit of our caller, but it won't be updated in the cache or in storage
				logger.Warn("obtained ARI update, but certificate no longer in cache; ARI update will be returned to caller, but not stored",
					zap.Time("selected_time", newARI.SelectedTime),
					zap.Timep("next_update", newARI.RetryAfter),
					zap.String("explanation_url", newARI.ExplanationURL))
				return
			}
			updatedCert.ari = newARI
			cfg.certCache.cache[cert.hash] = updatedCert
			cfg.certCache.mu.Unlock()

			// update the ARI value in storage
			var certData acme.Certificate
			certData, err = cfg.loadStoredACMECertificateMetadata(ctx, cert)
			if err != nil {
				err = fmt.Errorf("got new ARI from %s, but failed loading stored certificate metadata: %v", iss.IssuerKey(), err)
				return
			}
			certData.RenewalInfo = &newARI
			var certDataBytes, certResBytes []byte
			certDataBytes, err = json.Marshal(certData)
			if err != nil {
				err = fmt.Errorf("got new ARI from %s, but failed marshaling certificate ACME metadata: %v", iss.IssuerKey(), err)
				return
			}
			certResBytes, err = json.MarshalIndent(CertificateResource{
				SANs:       cert.Names,
				IssuerData: certDataBytes,
			}, "", "\t")
			if err != nil {
				err = fmt.Errorf("got new ARI from %s, but could not re-encode certificate metadata: %v", iss.IssuerKey(), err)
				return
			}
			if err = cfg.Storage.Store(ctx, StorageKeys.SiteMeta(cert.issuerKey, cert.Names[0]), certResBytes); err != nil {
				err = fmt.Errorf("got new ARI from %s, but could not store it with certificate metadata: %v", iss.IssuerKey(), err)
				return
			}

			logger.Info("updated and stored ACME renewal information",
				zap.Time("selected_time", newARI.SelectedTime),
				zap.Timep("next_update", newARI.RetryAfter),
				zap.String("explanation_url", newARI.ExplanationURL))

			return
		}
	}

	err = fmt.Errorf("could not fully update ACME renewal info: either no issuer supporting ARI is configured for certificate, or all such failed (make sure the ACME CA that issued the certificate is configured)")
	return
}

// CleanStorageOptions specifies how to clean up a storage unit.
type CleanStorageOptions struct {
	// Optional custom logger.
	Logger *zap.Logger

	// Optional ID of the instance initiating the cleaning.
	InstanceID string

	// If set, cleaning will be skipped if it was performed
	// more recently than this interval.
	Interval time.Duration

	// Whether to clean cached OCSP staples.
	OCSPStaples bool

	// Whether to cleanup expired certificates, and if so,
	// how long to let them stay after they've expired.
	ExpiredCerts           bool
	ExpiredCertGracePeriod time.Duration
}

// CleanStorage removes assets which are no longer useful,
// according to opts.
func CleanStorage(ctx context.Context, storage Storage, opts CleanStorageOptions) error {
	const (
		lockName   = "storage_clean"
		storageKey = "last_clean.json"
	)

	if opts.Logger == nil {
		opts.Logger = defaultLogger.Named("clean_storage")
	}
	opts.Logger = opts.Logger.With(zap.Any("storage", storage))

	// storage cleaning should be globally exclusive
	if err := acquireLock(ctx, storage, lockName); err != nil {
		return fmt.Errorf("unable to acquire %s lock: %v", lockName, err)
	}
	defer func() {
		if err := releaseLock(ctx, storage, lockName); err != nil {
			opts.Logger.Error("unable to release lock", zap.Error(err))
			return
		}
	}()

	// cleaning should not happen more often than the interval
	if opts.Interval > 0 {
		lastCleanBytes, err := storage.Load(ctx, storageKey)
		if !errors.Is(err, fs.ErrNotExist) {
			if err != nil {
				return fmt.Errorf("loading last clean timestamp: %v", err)
			}

			var lastClean lastCleanPayload
			err = json.Unmarshal(lastCleanBytes, &lastClean)
			if err != nil {
				return fmt.Errorf("decoding last clean data: %v", err)
			}

			lastTLSClean := lastClean["tls"]
			if time.Since(lastTLSClean.Timestamp) < opts.Interval {
				nextTime := time.Now().Add(opts.Interval)
				opts.Logger.Info("storage cleaning happened too recently; skipping for now",
					zap.String("instance", lastTLSClean.InstanceID),
					zap.Time("try_again", nextTime),
					zap.Duration("try_again_in", time.Until(nextTime)),
				)
				return nil
			}
		}
	}

	opts.Logger.Info("cleaning storage unit")

	if opts.OCSPStaples {
		err := deleteOldOCSPStaples(ctx, storage, opts.Logger)
		if err != nil {
			opts.Logger.Error("deleting old OCSP staples", zap.Error(err))
		}
	}
	if opts.ExpiredCerts {
		err := deleteExpiredCerts(ctx, storage, opts.Logger, opts.ExpiredCertGracePeriod)
		if err != nil {
			opts.Logger.Error("deleting expired certificates staples", zap.Error(err))
		}
	}
	// TODO: delete stale locks?

	// update the last-clean time
	lastCleanBytes, err := json.Marshal(lastCleanPayload{
		"tls": lastCleaned{
			Timestamp:  time.Now(),
			InstanceID: opts.InstanceID,
		},
	})
	if err != nil {
		return fmt.Errorf("encoding last cleaned info: %v", err)
	}
	if err := storage.Store(ctx, storageKey, lastCleanBytes); err != nil {
		return fmt.Errorf("storing last clean info: %v", err)
	}

	return nil
}

type lastCleanPayload map[string]lastCleaned

type lastCleaned struct {
	Timestamp  time.Time `json:"timestamp"`
	InstanceID string    `json:"instance_id,omitempty"`
}

func deleteOldOCSPStaples(ctx context.Context, storage Storage, logger *zap.Logger) error {
	ocspKeys, err := storage.List(ctx, prefixOCSP, false)
	if err != nil {
		// maybe just hasn't been created yet; no big deal
		return nil
	}
	for _, key := range ocspKeys {
		// if context was cancelled, quit early; otherwise proceed
		select {
		case <-ctx.Done():
			return ctx.Err()
		default:
		}
		ocspBytes, err := storage.Load(ctx, key)
		if err != nil {
			logger.Error("while deleting old OCSP staples, unable to load staple file", zap.Error(err))
			continue
		}
		resp, err := ocsp.ParseResponse(ocspBytes, nil)
		if err != nil {
			// contents are invalid; delete it
			err = storage.Delete(ctx, key)
			if err != nil {
				logger.Error("purging corrupt staple file", zap.String("storage_key", key), zap.Error(err))
			}
			continue
		}
		if time.Now().After(resp.NextUpdate) {
			// response has expired; delete it
			err = storage.Delete(ctx, key)
			if err != nil {
				logger.Error("purging expired staple file", zap.String("storage_key", key), zap.Error(err))
			}
		}
	}
	return nil
}

func deleteExpiredCerts(ctx context.Context, storage Storage, logger *zap.Logger, gracePeriod time.Duration) error {
	issuerKeys, err := storage.List(ctx, prefixCerts, false)
	if err != nil {
		// maybe just hasn't been created yet; no big deal
		return nil
	}

	for _, issuerKey := range issuerKeys {
		siteKeys, err := storage.List(ctx, issuerKey, false)
		if err != nil {
			logger.Error("listing contents", zap.String("issuer_key", issuerKey), zap.Error(err))
			continue
		}

		for _, siteKey := range siteKeys {
			// if context was cancelled, quit early; otherwise proceed
			select {
			case <-ctx.Done():
				return ctx.Err()
			default:
			}

			siteAssets, err := storage.List(ctx, siteKey, false)
			if err != nil {
				logger.Error("listing site contents", zap.String("site_key", siteKey), zap.Error(err))
				continue
			}

			for _, assetKey := range siteAssets {
				if path.Ext(assetKey) != ".crt" {
					continue
				}

				certFile, err := storage.Load(ctx, assetKey)
				if err != nil {
					return fmt.Errorf("loading certificate file %s: %v", assetKey, err)
				}
				block, _ := pem.Decode(certFile)
				if block == nil || block.Type != "CERTIFICATE" {
					return fmt.Errorf("certificate file %s does not contain PEM-encoded certificate", assetKey)
				}
				cert, err := x509.ParseCertificate(block.Bytes)
				if err != nil {
					return fmt.Errorf("certificate file %s is malformed; error parsing PEM: %v", assetKey, err)
				}

				if expiredTime := time.Since(expiresAt(cert)); expiredTime >= gracePeriod {
					logger.Info("certificate expired beyond grace period; cleaning up",
						zap.String("asset_key", assetKey),
						zap.Duration("expired_for", expiredTime),
						zap.Duration("grace_period", gracePeriod))
					baseName := strings.TrimSuffix(assetKey, ".crt")
					for _, relatedAsset := range []string{
						assetKey,
						baseName + ".key",
						baseName + ".json",
					} {
						logger.Info("deleting asset because resource expired", zap.String("asset_key", relatedAsset))
						err := storage.Delete(ctx, relatedAsset)
						if err != nil {
							logger.Error("could not clean up asset related to expired certificate",
								zap.String("base_name", baseName),
								zap.String("related_asset", relatedAsset),
								zap.Error(err))
						}
					}
				}
			}

			// update listing; if folder is empty, delete it
			siteAssets, err = storage.List(ctx, siteKey, false)
			if err != nil {
				continue
			}
			if len(siteAssets) == 0 {
				logger.Info("deleting site folder because key is empty", zap.String("site_key", siteKey))
				err := storage.Delete(ctx, siteKey)
				if err != nil {
					return fmt.Errorf("deleting empty site folder %s: %v", siteKey, err)
				}
			}
		}
	}
	return nil
}

// forceRenew forcefully renews cert and replaces it in the cache, and returns the new certificate. It is intended
// for use primarily in the case of cert revocation. This MUST NOT be called within a lock on cfg.certCacheMu.
func (cfg *Config) forceRenew(ctx context.Context, logger *zap.Logger, cert Certificate) (Certificate, error) {
	if cert.ocsp != nil && cert.ocsp.Status == ocsp.Revoked {
		logger.Warn("OCSP status for managed certificate is REVOKED; attempting to replace with new certificate",
			zap.Strings("identifiers", cert.Names),
			zap.Time("expiration", expiresAt(cert.Leaf)))
	} else {
		logger.Warn("forcefully renewing certificate",
			zap.Strings("identifiers", cert.Names),
			zap.Time("expiration", expiresAt(cert.Leaf)))
	}

	renewName := cert.Names[0]

	// if revoked for key compromise, we can't be sure whether the storage of
	// the key is still safe; however, we KNOW the old key is not safe, and we
	// can only hope by the time of revocation that storage has been secured;
	// key management is not something we want to get into, but in this case
	// it seems prudent to replace the key - and since renewal requires reuse
	// of a prior key, we can't do a "renew" to replace the cert if we need a
	// new key, so we'll have to do an obtain instead
	var obtainInsteadOfRenew bool
	if cert.ocsp != nil && cert.ocsp.RevocationReason == acme.ReasonKeyCompromise {
		err := cfg.moveCompromisedPrivateKey(ctx, cert, logger)
		if err != nil {
			logger.Error("could not remove compromised private key from use",
				zap.Strings("identifiers", cert.Names),
				zap.String("issuer", cert.issuerKey),
				zap.Error(err))
		}
		obtainInsteadOfRenew = true
	}

	var err error
	if obtainInsteadOfRenew {
		err = cfg.ObtainCertAsync(ctx, renewName)
	} else {
		// notice that we force renewal; otherwise, it might see that the
		// certificate isn't close to expiring and return, but we really
		// need a replacement certificate! see issue #4191
		err = cfg.RenewCertAsync(ctx, renewName, true)
	}
	if err != nil {
		if cert.ocsp != nil && cert.ocsp.Status == ocsp.Revoked {
			// probably better to not serve a revoked certificate at all
			logger.Error("unable to obtain new to certificate after OCSP status of REVOKED; removing from cache",
				zap.Strings("identifiers", cert.Names),
				zap.Error(err))
			cfg.certCache.mu.Lock()
			cfg.certCache.removeCertificate(cert)
			cfg.certCache.mu.Unlock()
		}
		return cert, fmt.Errorf("unable to forcefully get new certificate for %v: %w", cert.Names, err)
	}

	return cfg.reloadManagedCertificate(ctx, cert)
}

// moveCompromisedPrivateKey moves the private key for cert to a ".compromised" file
// by copying the data to the new file, then deleting the old one.
func (cfg *Config) moveCompromisedPrivateKey(ctx context.Context, cert Certificate, logger *zap.Logger) error {
	privKeyStorageKey := StorageKeys.SitePrivateKey(cert.issuerKey, cert.Names[0])

	privKeyPEM, err := cfg.Storage.Load(ctx, privKeyStorageKey)
	if err != nil {
		return err
	}

	compromisedPrivKeyStorageKey := privKeyStorageKey + ".compromised"
	err = cfg.Storage.Store(ctx, compromisedPrivKeyStorageKey, privKeyPEM)
	if err != nil {
		// better safe than sorry: as a last resort, try deleting the key so it won't be reused
		cfg.Storage.Delete(ctx, privKeyStorageKey)
		return err
	}

	err = cfg.Storage.Delete(ctx, privKeyStorageKey)
	if err != nil {
		return err
	}

	logger.Info("removed certificate's compromised private key from use",
		zap.String("storage_path", compromisedPrivKeyStorageKey),
		zap.Strings("identifiers", cert.Names),
		zap.String("issuer", cert.issuerKey))

	return nil
}

// certShouldBeForceRenewed returns true if cert should be forcefully renewed
// (like if it is revoked according to its OCSP response).
func certShouldBeForceRenewed(cert Certificate) bool {
	return cert.managed &&
		len(cert.Names) > 0 &&
		cert.ocsp != nil &&
		cert.ocsp.Status == ocsp.Revoked
}

type certList []Certificate

// insert appends cert to the list if it is not already in the list.
// Efficiency: O(n)
func (certs *certList) insert(cert Certificate) {
	for _, c := range *certs {
		if c.hash == cert.hash {
			return
		}
	}
	*certs = append(*certs, cert)
}

const (
	// DefaultRenewCheckInterval is how often to check certificates for expiration.
	// Scans are very lightweight, so this can be semi-frequent. This default should
	// be smaller than <Minimum Cert Lifetime>*DefaultRenewalWindowRatio/3, which
	// gives certificates plenty of chance to be renewed on time.
	DefaultRenewCheckInterval = 10 * time.Minute

	// DefaultRenewalWindowRatio is how much of a certificate's lifetime becomes the
	// renewal window. The renewal window is the span of time at the end of the
	// certificate's validity period in which it should be renewed. A default value
	// of ~1/3 is pretty safe and recommended for most certificates.
	DefaultRenewalWindowRatio = 1.0 / 3.0

	// DefaultOCSPCheckInterval is how often to check if OCSP stapling needs updating.
	DefaultOCSPCheckInterval = 1 * time.Hour
)
